
############################################################################
# Conditional logistic regression analysis of shadow rapporteur appointments
############################################################################

# Programme:  shdw.analysis07a-final.r
# Project:    EP shadow rapporteur selection
# Author:		  Frank Haege, Department of Politics and Administration, University of Limerick
# Contact:		frank.haege@ul.ie

# Description
#############
# This script estimates conditional logistic regression models of shadow rapporteur selection
# for the pooled sample and for individual party groups as presented in Table 2 of the article.
# It also produces a plot of the predicted values over the range of the similarity in policy interests
# variable (Figure 1 in the article), holding the other explanatory variables constant.
# Finally the script produces Table 1 showing the range and average choice set size of party groups,
# as well as other descriptive statistics referred to in the text.


# Remove all objects
rm(list = ls(all = TRUE))

# Load libraries
library(ggplot2)
library(ggpubr)
library(texreg)
library(survival)
library(data.table)

# Change default setting
options(stringsAsFactors = FALSE)
theme_set(theme_classic())

# Set working directory
setwd("C:\\Users\\Frank\\Dropbox\\European Union\\EP shadow rapporteurs\\EUP submission")

# Load data
load("Data analysis\\shdw-management05-clean.RData")


# Descriptive statistics
########################

# Which ideology variable has more variation?
summary((df$leftright-5)/5)
summary(df$leftright.wn)
sd((df$leftright-5)/5, na.rm=T)
sd(df$leftright.wn, na.rm=T)
sd((df$leftright[df$epg.accro=="EPP"]-5)/5, na.rm=T)
sd(df$leftright.wn[df$epg.accro=="EPP"], na.rm=T)
sd((df$leftright[df$epg.accro=="S&D"]-5)/5, na.rm=T)
sd(df$leftright.wn[df$epg.accro=="S&D"], na.rm=T)
sd((df$leftright[df$epg.accro=="GREENS"]-5)/5, na.rm=T)
sd(df$leftright.wn[df$epg.accro=="GREENS"], na.rm=T)

# How many proposals?
length(unique(df$proc.code))
# 471 co-decision proposals

# How many procedures by committee?
dt = data.table(unique(df[, c("com.accro", "proc.code")]))
dt[ , .(count = .N), by = list(com.accro)]

# How many reports?
length(unique(df$doc.no))
# 491 co-decision reports

# How many reports by committee?
dt = data.table(unique(df[, c("com.accro", "doc.no")]))
dt[ , .(count = .N), by = list(com.accro)]
# Generally more reports than procedures (because of several readings)

# How many potential shadow rapporteurs per proposal and party group?
dt = data.table(df[, c("com.accro", "doc.no", "epg.accro")])
dt = dt[ , .(count = .N), by = list(com.accro, doc.no, epg.accro)]
dt[ , .(min = min(count), avge = mean(count), max = max(count)), by = list(com.accro, epg.accro)]


# Table 1: Choice set size by party group
#########################################

# Table minimum, mean, and maximum number of party group members in a commmittee-report context
dt[ , .(min = min(count), avge = mean(count), max = max(count)), by = list(epg.accro)]


# How often do EFD MEPs shadow?
dt = data.table(df[df$epg.accro=="EFD", ])
dt[ , .(count = .N, sum = sum(shadow)), by = list(mep.id)]
rm(dt)

# How many choice sets?
length(unique(df$rep.epg))

# How often is distance to rapporteur larger than distance to shadow?
closer.eu.position = df$diff.rappmed.eu.position-df$diff.median.eu.position
hist(closer.eu.position)
closer.eu.position.yes = closer.eu.position>=0
prop.table(table(closer.eu.position.yes))

closer.leftright = df$diff.rappmed.leftright - df$diff.median.leftright 
hist(closer.leftright)
closer.leftright.yes = closer.leftright>=0
prop.table(table(closer.leftright.yes))

closer.galtan = df$diff.rappmed.galtan - df$diff.median.galtan 
closer.galtan.yes = closer.galtan>=0
hist(closer.galtan)
prop.table(table(closer.galtan.yes))
rm(closer.eu.position, closer.eu.position.yes, closer.galtan, closer.galtan.yes,
   closer.leftright, closer.leftright.yes)


# Table 2: Conditional logistic regression analysis of shadow rapporteur appointments
#####################################################################################

# Pooled model
res1 = clogit(shadow ~ 
               joint.country + 
               ccc.all +
               term.no +
               diff.median.leftright +
               diff.median.eu.position +
               diff.median.galtan +
               diff.rapp.leftright + 
               diff.rapp.eu.position + 
               diff.rapp.galtan + 
               com.leader + 
               com.sub + 
               log.npd.share + 
               epg.leader +
               absence + 
               strata(rep.epg), data=df)

# GUE-NGL
res2 = clogit(shadow ~ 
               joint.country + 
               ccc.all +
               term.no +
               diff.median.leftright +
               diff.median.eu.position +
               diff.median.galtan +
               diff.rapp.leftright + 
               diff.rapp.eu.position + 
               diff.rapp.galtan + 
               com.leader + 
               com.sub + 
               log.npd.share + 
               epg.leader +
               absence + 
               strata(rep.epg), data=df[df$epg.accro=="GUE-NGL", ])

# Greens
res3 = clogit(shadow ~ 
               joint.country + 
               ccc.all +
               term.no +
               diff.median.leftright +
               diff.median.eu.position +
               diff.median.galtan +
               diff.rapp.leftright + 
               diff.rapp.eu.position + 
               diff.rapp.galtan + 
               com.leader + 
               com.sub + 
               log.npd.share + 
               epg.leader +
               absence + 
               strata(rep.epg), data=df[df$epg.accro=="GREENS", ])

# S&D
res4 = clogit(shadow ~ 
               joint.country + 
               ccc.all +
               term.no +
               diff.median.leftright +
               diff.median.eu.position +
               diff.median.galtan +
               diff.rapp.leftright + 
               diff.rapp.eu.position + 
               diff.rapp.galtan + 
               com.leader + 
               com.sub + 
               log.npd.share + 
               epg.leader +
               absence + 
               strata(rep.epg), data=df[df$epg.accro=="S&D", ])

# ALDE
res5 = clogit(shadow ~ 
               joint.country + 
               ccc.all +
               term.no +
               diff.median.leftright +
               diff.median.eu.position +
               diff.median.galtan +
               diff.rapp.leftright + 
               diff.rapp.eu.position + 
               diff.rapp.galtan + 
               com.leader + 
               com.sub + 
               log.npd.share + 
               epg.leader +
               absence + 
               strata(rep.epg), data=df[df$epg.accro=="ALDE", ])

# EPP
res6 = clogit(shadow ~ 
               joint.country + 
               ccc.all +
               term.no +
               diff.median.leftright +
               diff.median.eu.position +
               diff.median.galtan +
               diff.rapp.leftright + 
               diff.rapp.eu.position + 
               diff.rapp.galtan + 
               com.leader + 
               com.sub + 
               log.npd.share + 
               epg.leader +
               absence + 
               strata(rep.epg), data=df[df$epg.accro=="EPP", ])

# ECR
res7 = clogit(shadow ~ 
               joint.country + 
               ccc.all +
               term.no +
               diff.median.leftright +
               diff.median.eu.position +
               diff.median.galtan +
               diff.rapp.leftright + 
               diff.rapp.eu.position + 
               diff.rapp.galtan + 
               com.leader + 
               com.sub + 
               log.npd.share + 
               epg.leader +
               absence + 
               strata(rep.epg), data=df[df$epg.accro=="ECR", ])

# Print results with regression coefficients to screen
screenreg(list(res1, res2, res3, res4, res5, res6, res7), omit.coef = "(com.accro)|(epg.accro)",
          custom.model.names=c("Full Sample", levels(df$epg.accro)[1:6]), 
          include.rsquared = F, include.maxrs = F)

# Print results with odds ratios to screen
screenreg(list(res1, res2, res3, res4, res5, res6, res7),
          custom.model.names=c("Full Sample", levels(df$epg.accro)[1:6]),
          override.coef=list(exp(coef(res1)), exp(coef(res2)), exp(coef(res3)),
                             exp(coef(res4)), exp(coef(res5)), exp(coef(res6)),
                             exp(coef(res7))),
          override.se=list(exp(coef(res1))*sqrt(diag(res1$var)), 
                           exp(coef(res2))*sqrt(diag(res2$var)), 
                           exp(coef(res3))*sqrt(diag(res3$var)),
                           exp(coef(res4))*sqrt(diag(res4$var)), 
                           exp(coef(res5))*sqrt(diag(res5$var)), 
                           exp(coef(res6))*sqrt(diag(res6$var)),
                           exp(coef(res7))*sqrt(diag(res7$var))), 
          include.rsquared = F, include.maxrs = F)

# Export table
htmlreg(list(res1, res2, res3, res4, res5, res6, res7), 
        custom.coef.map = list(
          "diff.median.leftright"="Left-Right",
          "diff.median.eu.position"="EU Support",
          "diff.median.galtan"="GAL/TAN",
          "diff.rapp.leftright"="Rapporteur Left-Right",
          "diff.rapp.eu.position"="Rapporteur EU Support",
          "diff.rapp.galtan"="Rapporteur GAL/TAN",        
          "joint.country"="Similar National Salience",
          "ccc.all"="Similar Policy Interests",
          "term.no"="Seniority",
          "com.leader"="Committee Leader",
          "log.npd.share"="National Delegation Size",
          "com.sub"="Substitute Member",
          "epg.leader"="Party Group Leader",
          "absence"="Vote Absenteeism"),
        digits=2,
        override.coef=list(exp(coef(res1)), exp(coef(res2)), exp(coef(res3)),
                           exp(coef(res4)), exp(coef(res5)), exp(coef(res6)),
                           exp(coef(res7))),
        override.se=list(exp(coef(res1))*sqrt(diag(res1$var)), 
                         exp(coef(res2))*sqrt(diag(res2$var)), 
                         exp(coef(res3))*sqrt(diag(res3$var)),
                         exp(coef(res4))*sqrt(diag(res4$var)), 
                         exp(coef(res5))*sqrt(diag(res5$var)), 
                         exp(coef(res6))*sqrt(diag(res6$var)),
                         exp(coef(res7))*sqrt(diag(res7$var))),
        custom.model.names=c("Full Sample", "GUE-NGL", "Greens", "S&D", "ALDE", "EPP", "ECR"), 
        include.rsquared = F, include.maxrs = F,
        file="Data analysis\\Tables\\shdw-analysis01a-final.doc")


# Figure 1: Effect of similar policy interests on pobability of shadow rapporteurship
#####################################################################################

# Estimate pooled conditional logit model
res = clogit(shadow ~ 
                 joint.country + 
                 ccc.all +
                 term.no + 
                 diff.median.leftright +
                 diff.median.eu.position +
                 diff.median.galtan +
                 diff.rapp.leftright + 
                 diff.rapp.eu.position + 
                 diff.rapp.galtan + 
                 com.leader + 
                 com.sub + 
                 log.npd.share + 
                 epg.leader +
                 absence + 
                 strata(rep.epg), data=df)
summary(res)

# Create new data for prediction
# Set all other quantitative variables to their mean and dummy variables to zero
# The strata is set to S&D and A7-0151/2013: Proposal for a regulation of the European Parliament and of the Council 
# amending Regulation (EC) No 443/2009 to define the modalities for reaching the 2020 target to reduce CO2 emissions 
# from new passenger cars
newdata = data.frame(joint.country = 0,
                       ccc.all = seq(from=-9, to=74, length.out=100),
                       term.no = mean(df$term.no, na.rm=T),
                       diff.median.leftright = mean(df$diff.median.leftright, na.rm=T),
                       diff.median.eu.position = mean(df$diff.median.eu.position, na.rm=T),
                       diff.median.galtan = mean(df$diff.median.galtan, na.rm=T),
                       diff.rapp.leftright = mean(df$diff.rapp.leftright, na.rm=T),
                       diff.rapp.eu.position = mean(df$diff.rapp.eu.position, na.rm=T),
                       diff.rapp.galtan = mean(df$diff.rapp.galtan, na.rm=T),        
                       com.leader = mean(df$com.leader, na.rm=T),
                       log.npd.share = mean(df$log.npd.share, na.rm=T),
                       com.sub = 0,
                       epg.leader = 0,
                       absence = mean(df$absence, na.rm=T),
                       rep.epg = "A7-0151/2013.ENVI.S&D")

# Predicted values 
# (see https://markmail.org/search/?q=list%3Aorg.r-project.r-help+predict+clogit#query:list%3Aorg.r-project.r-help%20predict%20clogit%20from%3A%22Therneau%2C%20Terry%20M.%2C%20Ph.D.%22+page:1+mid:tsbl3cbnxywkafv6+state:results)
lin.pred = predict(res, newdata, type="lp", se.fit=T)

# Create confidence interval
upr = lin.pred$fit + (1.96 * lin.pred$se.fit)
lwr = lin.pred$fit - (1.96 * lin.pred$se.fit)
fit = lin.pred$fit

# Transform predicted values and confidence intervals using link function
newdata$fit = (exp(fit)/(1+exp(fit)))*100
newdata$upr = (exp(lwr)/(1+exp(lwr)))*100
newdata$lwr = (exp(upr)/(1+exp(upr)))*100
rm(fit, lwr, upr, lin.pred)

# Create data for rug plot
hist(df$ccc.all, probability = T, breaks=100)
tail(df$ccc.all[order(df$ccc.all)], 20)
cat = hist(df$ccc.all, probability = T, breaks=100)
h = data.frame("freq"=cat$counts, "prop"=cat$density*100, "mids"=cat$mids, "y"=0)
rm(cat)

# Plot effect of similarity of policy interests on probability of shadow rapporteur selection
p = ggplot() + 
  geom_line(data=newdata, mapping=aes(x=ccc.all, y=fit), col="black", size=1) +
  geom_segment(data=h, size=1.1, show.legend=FALSE,
               aes(x=mids, xend=mids, y=y, yend=prop), colour="black", alpha=0.6) +
  geom_ribbon(data=newdata,aes(x=ccc.all, ymin=lwr,ymax=upr),alpha=0.1) +
  ylab("Probability of selection (%)") +
  ylim(0, 100) +
  xlab("Similarity of policy interests") +
  theme_classic(base_size=20)
p

# Export figure
ggexport(p, filename="Data analysis\\Figures\\shdw-analysis01a-final.png", width=500, height=500)

# What is the predicted value for the 5th and 95th percentile of policy similarity?
quantile(df$ccc.all)
quantile(df$ccc.all, prob=c(.05, .95))

